library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readspss)
library(tableone)
library(ggiraph)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(patchwork)
##
## Attaching package: 'patchwork'
##
## The following object is masked from 'package:cowplot':
##
## align_plots
library(nlme)
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:dplyr':
##
## collapse
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Attaching package: 'lme4'
##
## The following object is masked from 'package:nlme':
##
## lmList
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(lattice)
library(effects)
## Use the command
## lattice::trellis.par.set(effectsTheme())
## to customize lattice options for effects plots.
## See ?effectsTheme for details.
library(sjPlot)
##
## Attaching package: 'sjPlot'
##
## The following objects are masked from 'package:cowplot':
##
## plot_grid, save_plot
library(lmerTest)
##
## Attaching package: 'lmerTest'
##
## The following object is masked from 'package:lme4':
##
## lmer
##
## The following object is masked from 'package:stats':
##
## step
library(sandwich)
data.file <- read.sav("BARCS_Data_used_for_paper.sav")
data.sem1 <- data.file %>% filter(Semester == 1)
data.sem2 <- data.file %>% filter(Semester == 2)
data.sem3 <- data.file %>% filter(Semester == 3)
data.sem4 <- data.file %>% filter(Semester == 4)
data.SEM1 <- data.sem1 %>% mutate(LOG_Avg_Drinks_current = LOG_Avg_Drinks_SEM1,
LOG_Avg_MJ_current = LOG_Avg_MJ_SEM1,
Cluster_current = Cluster_SEM1,
Log_B30_current = Log_B30_Sem1)
data.SEM2 <- data.sem2 %>% mutate(LOG_Avg_Drinks_current = LOG_Avg_Drinks_SEM2,
LOG_Avg_MJ_current = LOG_Avg_MJ_SEM2,
Cluster_current = Cluster_SEM2,
Log_B30_current = Log_B30_Sem2)
data.SEM3 <- data.sem3 %>% mutate(LOG_Avg_Drinks_current = LOG_Avg_Drinks_SEM3,
LOG_Avg_MJ_current = LOG_Avg_MJ_SEM3,
Cluster_current = Cluster_SEM3,
Log_B30_current = Log_B30_Sem3)
data.SEM4 <- data.sem4 %>% mutate(LOG_Avg_Drinks_current = LOG_Avg_Drinks_SEM4,
LOG_Avg_MJ_current = LOG_Avg_MJ_SEM4,
Cluster_current = Cluster_SEM4,
Log_B30_current = Log_B30_Sem4)
data.file.long <- rbind(data.SEM1, data.SEM2, data.SEM3, data.SEM4)
data.file.long <- data.file.long %>%
mutate(Avg_Drinks_SEM1 = 10^(LOG_Avg_Drinks_SEM1) -1,
Avg_Drinks_SEM2 = 10^(LOG_Avg_Drinks_SEM2) -1,
Avg_Drinks_SEM3 = 10^(LOG_Avg_Drinks_SEM3) -1,
Avg_Drinks_SEM4 = 10^(LOG_Avg_Drinks_SEM4) -1,
Avg_Drinks_current = 10^(LOG_Avg_Drinks_current) -1,
Avg_MJ_SEM1 = 10^(LOG_Avg_MJ_SEM1) -1,
Avg_MJ_SEM2 = 10^(LOG_Avg_MJ_SEM2) -1,
Avg_MJ_SEM3 = 10^(LOG_Avg_MJ_SEM3) -1,
Avg_MJ_SEM4 = 10^(LOG_Avg_MJ_SEM4) -1,
Avg_MJ_current = 10^(LOG_Avg_MJ_current) -1,
B30_SEM1 = 10^(Log_B30_Sem1) -1,
B30_SEM2 = 10^(Log_B30_Sem2) -1,
B30_SEM3 = 10^(Log_B30_Sem3) -1,
B30_SEM4 = 10^(Log_B30_Sem4) -1,
B30_current = 10^(Log_B30_current) -1
)
### needs some work
data.file.long <- data.file.long %>%
group_by(BARCS_ID) %>%
mutate(diff_Avg_Drinks_current = Avg_Drinks_current - lag(Avg_Drinks_current, 1),
diff_Avg_MJ_current = Avg_MJ_current - lag(Avg_MJ_current, 1),
diff_B30_current = B30_current - lag(B30_current, 1),
diff_LOG_Avg_Drinks_current = LOG_Avg_Drinks_current - lag(LOG_Avg_Drinks_current, 1),
diff_LOG_Avg_MJ_current = LOG_Avg_MJ_current - lag(LOG_Avg_MJ_current, 1),
diff_GPA = GPA - lag(GPA, 1),
diff_LOG_B30_current = Log_B30_current - lag(Log_B30_current, 1),
transition_current = as.numeric(Cluster_current) - lag(as.numeric(Cluster_current), 1)
)
data.file.long <- data.file.long %>%
ungroup() %>%
group_by(Semester) %>%
mutate(mean_Avg_Drinks = Avg_Drinks_current - mean(Avg_Drinks_current),
std_Avg_Drinks = mean_Avg_Drinks / sd(Avg_Drinks_current),
mean_LOG_Avg_Drinks = LOG_Avg_Drinks_current - mean(LOG_Avg_Drinks_current),
std_LOG_Avg_Drinks = mean_LOG_Avg_Drinks / sd(LOG_Avg_Drinks_current),
mean_Avg_MJ = Avg_MJ_current - mean(Avg_MJ_current, na.rm = TRUE),
std_Avg_MJ = mean_Avg_MJ / sd(Avg_MJ_current, na.rm = TRUE),
mean_LOG_Avg_MJ = LOG_Avg_MJ_current - mean(LOG_Avg_MJ_current, na.rm = TRUE),
std_LOG_Avg_MJ = mean_LOG_Avg_MJ / sd(LOG_Avg_MJ_current, na.rm = TRUE),
mean_B30 = B30_current - mean(B30_current, na.rm = TRUE),
std_B30 = mean_B30 / sd(B30_current, na.rm = TRUE),
mean_LOG_B30 = Log_B30_current - mean(Log_B30_current, na.rm = TRUE),
std_LOG_B30 = mean_LOG_B30 / sd(Log_B30_current, na.rm = TRUE)#,
# mean_GPA = GPA - mean(GPA),
# std_GPA = mean_GPA / sd(GPA) there are NA's in the data that were wrongly assigned the value 0
)
data.file.long <- data.file.long %>%
relocate(BARCS_ID, Semester, GPA, diff_GPA, Avg_Drinks_current, mean_Avg_Drinks, std_Avg_Drinks, diff_Avg_Drinks_current,
LOG_Avg_Drinks_current, mean_LOG_Avg_Drinks, std_LOG_Avg_Drinks, diff_LOG_Avg_Drinks_current, Avg_MJ_current,
mean_Avg_MJ, std_Avg_MJ, diff_Avg_MJ_current, LOG_Avg_MJ_current, mean_LOG_Avg_MJ, std_LOG_Avg_MJ,
diff_LOG_Avg_MJ_current)
## Cluster and student id need to be factors
data.file.long <- data.file.long %>% mutate(Cluster_current = as.factor(Cluster_current),
BARCS_ID = as.factor(BARCS_ID))
## Data selection required to identify if GPA = 0 and SAT = 0 are NAs
ind.gpa0 <- data.file.long %>% filter(GPA == 0) %>% select(BARCS_ID, Semester, GPA)
ind.gpa0.vector <- unique(ind.gpa0$BARCS_ID)
ind.gpa0 <- data.file.long %>% filter(BARCS_ID %in% ind.gpa0.vector) %>% group_by(BARCS_ID, Semester)
ind.sat0 <- data.file.long %>% ungroup() %>% filter(SATTotal == 0) %>% select(BARCS_ID, Semester, SATTotal, GPA)
### imputing NA for false classification of SAT / GPA = 0
data.file.long <- data.file.long %>% mutate(GPA = replace(GPA, GPA == 0, NA),
SATTotal = replace(SATTotal, SATTotal == 0, NA),
SATMath = replace(SATMath, SATMath == 0, NA),
SATVerbal = replace(SATVerbal, SATVerbal == 0, NA),
SATWriting = replace(SATWriting, SATWriting == 0, NA),
mean_GPA = GPA - mean(GPA, na.rm = TRUE),
std_GPA = mean_GPA / sd(GPA, na.rm = TRUE))
### create variable that counts the amount of NAs for each individual student
data.file.long <- data.file.long %>% group_by(BARCS_ID) %>%
mutate(sum.GPAna = sum(is.na(GPA)))
### include average GPA for each student
data.file.long <- data.file.long %>% group_by(BARCS_ID) %>% mutate(average_GPA = mean(GPA))
#data.file.long <- data.file.long %>% arrange(sum.GPAna, average_GPA)
data.file.long <- data.file.long %>%
mutate(Sex = case_match(Sex, 1 ~ "male", 2 ~ "female", .default = NA),
Cluster_current = case_match(Cluster_current, '1' ~ "1st.cluster", '2' ~ "2nd.cluster",
'3' ~ "3rd.cluster", .default = NA),
Cluster_SEM1 = case_match(Cluster_SEM1, '1' ~ "1st.cluster", '2' ~ "2nd.cluster",
'3' ~ "3rd.cluster", .default = NA),
Fager4_binary = case_match(Fager4_binary, 1 ~ "smoker", 0 ~ "non smoker", .default = NA),
FH_binary = case_match(FH_binary, 0 ~ "negative", 1 ~ "positive", .default = NA))
## splitting the data into the different subsections of missing GPA data (4 NAs means there is no GPA data -> dropped)
subset.0nagpas <- data.file.long %>% filter(sum.GPAna == 0) %>% arrange(average_GPA)
subset.1nagpas <- data.file.long %>% filter(sum.GPAna == 1) %>% arrange(average_GPA)
subset.2nagpas <- data.file.long %>% filter(sum.GPAna == 2) %>% arrange(average_GPA)
subset.3nagpas <- data.file.long %>% filter(sum.GPAna == 3) %>% arrange(average_GPA)
### Group transition compares only the first semester to the last semester!!!!!!!!!
variables.paper.page7table <- c("Age1stround", "SATMath", "SATVerbal", "SATWriting", "GPA", "Parental_SES", "STAI_SELF_Total", "BDI_SELF_Total", "Avg_Drinks_SEM1", "Avg_MJ_SEM1", "Sex", "Fager4_binary", "FH_binary")
catvars.paper.page7table <- c("Sex", "Fager4_binary", "FH_binary")
paper.page7table <- CreateTableOne(data = (data.file.long %>% filter(Semester == 1)), vars = variables.paper.page7table, catvars.paper.page7table, strata = "Cluster_SEM1", includeNA = TRUE, test = TRUE)
paper.page7table
## Stratified by Cluster_SEM1
## 1st.cluster 2nd.cluster 3rd.cluster
## n 487 463 188
## Age1stround (mean (SD)) 18.32 (0.91) 18.30 (0.73) 18.30 (0.63)
## SATMath (mean (SD)) 541.05 (89.52) 554.98 (90.68) 554.24 (84.78)
## SATVerbal (mean (SD)) 530.63 (91.04) 541.56 (89.33) 541.24 (76.95)
## SATWriting (mean (SD)) 534.41 (90.45) 553.75 (92.03) 544.82 (83.87)
## GPA (mean (SD)) 3.10 (0.67) 3.04 (0.64) 2.71 (0.77)
## Parental_SES (mean (SD)) 12.55 (7.05) 10.23 (5.47) 10.24 (5.76)
## STAI_SELF_Total (mean (SD)) 40.14 (9.87) 39.23 (10.09) 41.46 (10.70)
## BDI_SELF_Total (mean (SD)) 3.33 (4.45) 3.13 (4.44) 4.24 (5.06)
## Avg_Drinks_SEM1 (mean (SD)) 0.40 (0.75) 29.29 (32.22) 54.54 (42.69)
## Avg_MJ_SEM1 (mean (SD)) 0.09 (0.40) 0.42 (0.72) 13.55 (8.13)
## Sex (%)
## female 299 (61.4) 286 (61.8) 87 (46.3)
## male 186 (38.2) 173 (37.4) 100 (53.2)
## NA 2 ( 0.4) 4 ( 0.9) 1 ( 0.5)
## Fager4_binary (%)
## non smoker 459 (94.3) 411 (88.8) 147 (78.2)
## smoker 19 ( 3.9) 42 ( 9.1) 38 (20.2)
## NA 9 ( 1.8) 10 ( 2.2) 3 ( 1.6)
## FH_binary = positive (%) 109 (22.4) 98 (21.2) 49 (26.1)
## Stratified by Cluster_SEM1
## p test
## n
## Age1stround (mean (SD)) 0.896
## SATMath (mean (SD)) 0.049
## SATVerbal (mean (SD)) 0.146
## SATWriting (mean (SD)) 0.007
## GPA (mean (SD)) <0.001
## Parental_SES (mean (SD)) <0.001
## STAI_SELF_Total (mean (SD)) 0.039
## BDI_SELF_Total (mean (SD)) 0.017
## Avg_Drinks_SEM1 (mean (SD)) <0.001
## Avg_MJ_SEM1 (mean (SD)) <0.001
## Sex (%) 0.003
## female
## male
## NA
## Fager4_binary (%) <0.001
## non smoker
## smoker
## NA
## FH_binary = positive (%) 0.397
Checking for the potential missing data
plot.gpa0 <- ggplot(ind.gpa0, aes(x = Semester, y = GPA, group = BARCS_ID, col = BARCS_ID)) +
geom_line(alpha = .5)+ theme_bw()
plot.gpa0 + theme(legend.position="none")
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplot(ind.sat0, aes(y = SATTotal, x = GPA)) + geom_point() + theme_bw()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
dim(ind.sat0 %>% filter(GPA == 0))
## [1] 12 4
Considering that almost all students have non zero GPA data before and after GPA = 0, it seems likely that a lot students were wrongly assigned 0 for NAs here. Additionally, a completed SAT can not have a point total of zero. It is also extremely likely here that 0 entries here mean that the SAT are missing data, as most students also have a nonzero GPA. Zero entries are subsequently imputed as NAs to avoid falsifying the later estimates and means (etc.)
The following plots show the the composition of the three different cluster given the alcohol and marijuana consumption choices. The first plot is a direct replication of the plot on the bottom of page 5, while the second plot represent the cluster alocation given the untransformed alcohol and marijuana consumptions. In the third plot, the untransformed variables are show, however only values up to 12 average monthly consumed alcoholic beverages & 10 times avarage MJ consumptions are being displayed in order for the separation between cluster 1 and 2 to become visible apparent.
data.file.long <- data.file.long %>% mutate(Cluster_current = as.factor(Cluster_current),
BARCS_ID = as.factor(BARCS_ID))
cluster.colors <- c('1' = "blue", '2' = "green", '3' = "red")
cluster.title <- "Cluster"
plot.page5 <- ggplot(data.file.long,
aes(x = LOG_Avg_MJ_current, y = LOG_Avg_Drinks_current, col = Cluster_current)) +
geom_point() +
xlab("Average Monthly MJ / Cannabis Use (Log10 Transformed)") +
ylab("Average Number of Drinks per Month (Log10 Transformed)") +
labs(colour = "cluster") +
ggtitle("Cluster alocation given Alcohol & MJ consumption (log10 transformed)") +
scale_colour_manual(values = cluster.colors, na.translate = FALSE) + theme_bw() +
facet_wrap(~ Semester)
suppressWarnings(print(plot.page5))
plot.page5.nottransformed <- ggplot(data.file.long,
aes(x = Avg_MJ_current, y = Avg_Drinks_current, col = Cluster_current)) +
geom_point(alpha = 0.25) + #ylim(0, 50) +
xlab("Average Monthly MJ / Cannabis Use (not transformed)") +
ylab("Average Number of Drinks per Month (not transformed)") +
labs(colour = "cluster") +
ggtitle("Cluster alocation given Alcohol & MJ consumption") +
scale_colour_manual(values = cluster.colors, na.translate = FALSE) + theme_bw() +
facet_wrap(~ Semester)
suppressWarnings(print(plot.page5.nottransformed))
plot.page5.nottransformed.focused <- ggplot(data.file.long,
aes(x = Avg_MJ_current, y = Avg_Drinks_current, col = Cluster_current)) +
geom_point(alpha = 1) + ylim(0, 12) + xlim(0, 8) +
xlab("Average Monthly MJ / Cannabis Use (not transformed)") +
ylab("Average Number of Drinks per Month (not transformed)") +
labs(colour = "cluster") +
ggtitle("Showing the Separation") +
scale_colour_manual(values = cluster.colors, na.translate = FALSE) + theme_bw() +
facet_wrap(~ Semester)
suppressWarnings(print(plot.page5.nottransformed.focused))
#cowplot::plot_grid(plot.page5, plot.page5.nottransformed, nrow = 2) ##just not good
It is noteworthy that the separation between Cluster 1 and 2 is around 3 monthly average alcohol beverages, though the hyperplane of the separation varies across the semester.
gpa.spaghetti <- ggplot(data.file.long %>% filter(!is.na(Cluster_SEM1)), aes(x = Semester, y = GPA, group = BARCS_ID, col = Cluster_current)) +
geom_point(alpha = 0.1) +
geom_line(alpha = 0.1) +
xlab("Semester") + ylab("Grade Point Average") + labs(colour = "cluster") +
ggtitle("GPA along original Cluster classification (1. Semester)") +
facet_wrap(~Cluster_SEM1) + scale_colour_manual(values = cluster.colors, na.translate = FALSE) + theme_bw()
gpa.spaghetti
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: Removed 4552 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4552 rows containing missing values or values outside the scale range
## (`geom_line()`).
gpa.spaghetti.diff <- ggplot(data.file.long %>% filter(!is.na(Cluster_SEM1)), aes(x = Semester, y = diff_GPA, group = BARCS_ID, col = Cluster_current)) +
geom_point(alpha = 0.1) +
geom_line(alpha = 0.1) +
xlab("Semester") + ylab("Change in GPA to previous Semester") + labs(colour = "cluster") +
ggtitle("Different GPA along original Cluster classification (1. Semester)") +
facet_wrap(~Cluster_SEM1) + scale_colour_manual(values = cluster.colors, na.translate = FALSE) + theme_bw()
gpa.spaghetti.diff
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: Removed 4552 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4552 rows containing missing values or values outside the scale range
## (`geom_line()`).
gpa.spaghetti.mean <- ggplot(data.file.long %>% filter(!is.na(Cluster_SEM1)), aes(x = Semester, y = mean_GPA, group = BARCS_ID, col = Cluster_current)) +
geom_point(alpha = 0.1) +
geom_line(alpha = 0.1) +
xlab("Semester") + ylab("Grade Point Average, mean adjusted") + labs(colour = "cluster") +
ggtitle("GPA along original Cluster classification (1. Semester) centered around mean") +
facet_wrap(~Cluster_SEM1) + scale_colour_manual(values = cluster.colors, na.translate = FALSE) + theme_bw()
gpa.spaghetti.mean
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: Removed 4552 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4552 rows containing missing values or values outside the scale range
## (`geom_line()`).
gpa.spaghetti.std <- ggplot(data.file.long %>% filter(!is.na(Cluster_SEM1)), aes(x = Semester, y = std_GPA, group = BARCS_ID, col = Cluster_current)) +
geom_point(alpha = 0.1) +
geom_line(alpha = 0.1) +
xlab("Semester") + ylab("Standardised Grade Point Average") + labs(colour = "cluster") +
ggtitle("GPA along original Cluster classification (1. Semester), standardised") +
facet_wrap(~Cluster_SEM1) + scale_colour_manual(values = cluster.colors, na.translate = FALSE) + theme_bw()
gpa.spaghetti.std
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: Removed 4552 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4552 rows containing missing values or values outside the scale range
## (`geom_line()`).
plot.cluster.sem1.GPA <- ggplot(data = data.file.long, aes(x=as.factor(Semester), y=GPA)) +
geom_boxplot(aes(fill=Cluster_SEM1)) +
xlab("Semester") + ylab("Grade Point Average") +
ggtitle("GPA given 1. Sem Cluster") +
scale_fill_manual(values = cluster.colors, na.translate = TRUE) + theme_bw() + guides(fill="none")
#plot.cluster.sem1.GPA
plot.cluster.current.GPA <- ggplot(data = data.file.long, aes(x = as.factor(Semester), y=GPA)) +
geom_boxplot(aes(fill=Cluster_current)) +
xlab("Semester") + ylab("Grade Point Average") + guides(fill=guide_legend(title="Cluster")) +
ggtitle("GPA given current Cluster") +
scale_fill_manual(values = cluster.colors, na.translate = TRUE) + theme_bw()
#plot.cluster.current.GPA
plot.cluster.sem1.GPA + plot.cluster.current.GPA
## Warning: Removed 362 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 362 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
plot.log.alcoholGPA <- ggplot(data = data.file.long, aes(x=LOG_Avg_Drinks_current, y=GPA)) +
geom_point(alpha = 0.3) + geom_smooth() + ylim(0, 4.5) + facet_wrap(~Semester) +
labs(x="Log10 Alcohol consumption per month during Semester" , y="GPA")
plot.log.alcoholGPA
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 362 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 362 rows containing missing values or values outside the scale range
## (`geom_point()`).
plot.alcoholGPA <- ggplot(data = data.file.long, aes(x=Avg_Drinks_current, y=GPA)) +
geom_point(alpha = 0.3) + geom_smooth() + ylim(0, 4.5) + facet_wrap(~Semester) +
labs(x="Alcohol consumption per month during Semester" , y="GPA")
plot.alcoholGPA
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 362 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 362 rows containing missing values or values outside the scale range
## (`geom_point()`).
Relationship appears to be nonlinear for log transformed Alcohol
variable, and somewhat non linear for the transformed variable
plot.log.MJGPA<- ggplot(data = data.file.long, aes(x=LOG_Avg_MJ_current, y=GPA)) +
geom_point(alpha = 0.3) + geom_smooth() + ylim(0, 4.5) + facet_wrap(~Semester, scales = "free") +
labs(x=" log MJ consumption during Semester" , y="GPA")
plot.MJGPA<- ggplot(data = data.file.long, aes(x=Avg_MJ_current, y=GPA)) +
geom_point(alpha = 0.3) + geom_smooth() + ylim(0, 4.5) + facet_wrap(~Semester, scales = "free") +
labs(x="MJ consumption during Semester" , y="GPA")
plot.log.MJGPA
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 766 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 766 rows containing missing values or values outside the scale range
## (`geom_point()`).
plot.MJGPA
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 766 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 766 rows containing missing values or values outside the scale range
## (`geom_point()`).
relationship seems to be mostly linear for both transformed and
untransformed MJ usage
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example: